{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bbc7afbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import warnings\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy.special import jn\n",
    "from IPython.display import display, clear_output\n",
    "import time \n",
    "warnings.filterwarnings('ignore')\n",
    "from IPython import get_ipython\n",
    "get_ipython().run_line_magic('matplotlib', 'inline')\n",
    "from sklearn import linear_model\n",
    "from sklearn import preprocessing\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor\n",
    "from sklearn.decomposition import PCA,FastICA,FactorAnalysis,SparsePCA\n",
    "import lightgbm as lgb\n",
    "import xgboost as xgb\n",
    "from sklearn.model_selection import GridSearchCV,cross_val_score,StratifiedKFold,train_test_split\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "237cb4f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train data shape: (150000, 31)\n",
      "TestA data shape: (50000, 30)\n"
     ]
    }
   ],
   "source": [
    "Train_data = pd.read_csv(r\"D:\\notebook\\caruse\\dataset\\used_car_train_20200313.csv\",sep=' ')\n",
    "TestA_data = pd.read_csv(r\"D:\\notebook\\caruse\\dataset\\used_car_testA_20200421.csv\", sep=' ')\n",
    "print('Train data shape:',Train_data.shape)\n",
    "print('TestA data shape:',TestA_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "dba45335",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>name</th>\n",
       "      <th>regDate</th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>bodyType</th>\n",
       "      <th>fuelType</th>\n",
       "      <th>gearbox</th>\n",
       "      <th>power</th>\n",
       "      <th>kilometer</th>\n",
       "      <th>...</th>\n",
       "      <th>v_5</th>\n",
       "      <th>v_6</th>\n",
       "      <th>v_7</th>\n",
       "      <th>v_8</th>\n",
       "      <th>v_9</th>\n",
       "      <th>v_10</th>\n",
       "      <th>v_11</th>\n",
       "      <th>v_12</th>\n",
       "      <th>v_13</th>\n",
       "      <th>v_14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>736</td>\n",
       "      <td>20040402</td>\n",
       "      <td>30.0</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>60</td>\n",
       "      <td>12.5</td>\n",
       "      <td>...</td>\n",
       "      <td>0.235676</td>\n",
       "      <td>0.101988</td>\n",
       "      <td>0.129549</td>\n",
       "      <td>0.022816</td>\n",
       "      <td>0.097462</td>\n",
       "      <td>-2.881803</td>\n",
       "      <td>2.804097</td>\n",
       "      <td>-2.420821</td>\n",
       "      <td>0.795292</td>\n",
       "      <td>0.914762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2262</td>\n",
       "      <td>20030301</td>\n",
       "      <td>40.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.264777</td>\n",
       "      <td>0.121004</td>\n",
       "      <td>0.135731</td>\n",
       "      <td>0.026597</td>\n",
       "      <td>0.020582</td>\n",
       "      <td>-4.900482</td>\n",
       "      <td>2.096338</td>\n",
       "      <td>-1.030483</td>\n",
       "      <td>-1.722674</td>\n",
       "      <td>0.245522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>14874</td>\n",
       "      <td>20040403</td>\n",
       "      <td>115.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>163</td>\n",
       "      <td>12.5</td>\n",
       "      <td>...</td>\n",
       "      <td>0.251410</td>\n",
       "      <td>0.114912</td>\n",
       "      <td>0.165147</td>\n",
       "      <td>0.062173</td>\n",
       "      <td>0.027075</td>\n",
       "      <td>-4.846749</td>\n",
       "      <td>1.803559</td>\n",
       "      <td>1.565330</td>\n",
       "      <td>-0.832687</td>\n",
       "      <td>-0.229963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>71865</td>\n",
       "      <td>19960908</td>\n",
       "      <td>109.0</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>193</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.274293</td>\n",
       "      <td>0.110300</td>\n",
       "      <td>0.121964</td>\n",
       "      <td>0.033395</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-4.509599</td>\n",
       "      <td>1.285940</td>\n",
       "      <td>-0.501868</td>\n",
       "      <td>-2.438353</td>\n",
       "      <td>-0.478699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>111080</td>\n",
       "      <td>20120103</td>\n",
       "      <td>110.0</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.228036</td>\n",
       "      <td>0.073205</td>\n",
       "      <td>0.091880</td>\n",
       "      <td>0.078819</td>\n",
       "      <td>0.121534</td>\n",
       "      <td>-1.896240</td>\n",
       "      <td>0.910783</td>\n",
       "      <td>0.931110</td>\n",
       "      <td>2.834518</td>\n",
       "      <td>1.923482</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID    name   regDate  model  brand  bodyType  fuelType  gearbox  power  \\\n",
       "0       0     736  20040402   30.0      6       1.0       0.0      0.0     60   \n",
       "1       1    2262  20030301   40.0      1       2.0       0.0      0.0      0   \n",
       "2       2   14874  20040403  115.0     15       1.0       0.0      0.0    163   \n",
       "3       3   71865  19960908  109.0     10       0.0       0.0      1.0    193   \n",
       "4       4  111080  20120103  110.0      5       1.0       0.0      0.0     68   \n",
       "\n",
       "   kilometer  ...       v_5       v_6       v_7       v_8       v_9      v_10  \\\n",
       "0       12.5  ...  0.235676  0.101988  0.129549  0.022816  0.097462 -2.881803   \n",
       "1       15.0  ...  0.264777  0.121004  0.135731  0.026597  0.020582 -4.900482   \n",
       "2       12.5  ...  0.251410  0.114912  0.165147  0.062173  0.027075 -4.846749   \n",
       "3       15.0  ...  0.274293  0.110300  0.121964  0.033395  0.000000 -4.509599   \n",
       "4        5.0  ...  0.228036  0.073205  0.091880  0.078819  0.121534 -1.896240   \n",
       "\n",
       "       v_11      v_12      v_13      v_14  \n",
       "0  2.804097 -2.420821  0.795292  0.914762  \n",
       "1  2.096338 -1.030483 -1.722674  0.245522  \n",
       "2  1.803559  1.565330 -0.832687 -0.229963  \n",
       "3  1.285940 -0.501868 -2.438353 -0.478699  \n",
       "4  0.910783  0.931110  2.834518  1.923482  \n",
       "\n",
       "[5 rows x 31 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Train_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e7f843dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 150000 entries, 0 to 149999\n",
      "Data columns (total 31 columns):\n",
      " #   Column             Non-Null Count   Dtype  \n",
      "---  ------             --------------   -----  \n",
      " 0   SaleID             150000 non-null  int64  \n",
      " 1   name               150000 non-null  int64  \n",
      " 2   regDate            150000 non-null  int64  \n",
      " 3   model              149999 non-null  float64\n",
      " 4   brand              150000 non-null  int64  \n",
      " 5   bodyType           145494 non-null  float64\n",
      " 6   fuelType           141320 non-null  float64\n",
      " 7   gearbox            144019 non-null  float64\n",
      " 8   power              150000 non-null  int64  \n",
      " 9   kilometer          150000 non-null  float64\n",
      " 10  notRepairedDamage  150000 non-null  object \n",
      " 11  regionCode         150000 non-null  int64  \n",
      " 12  seller             150000 non-null  int64  \n",
      " 13  offerType          150000 non-null  int64  \n",
      " 14  creatDate          150000 non-null  int64  \n",
      " 15  price              150000 non-null  int64  \n",
      " 16  v_0                150000 non-null  float64\n",
      " 17  v_1                150000 non-null  float64\n",
      " 18  v_2                150000 non-null  float64\n",
      " 19  v_3                150000 non-null  float64\n",
      " 20  v_4                150000 non-null  float64\n",
      " 21  v_5                150000 non-null  float64\n",
      " 22  v_6                150000 non-null  float64\n",
      " 23  v_7                150000 non-null  float64\n",
      " 24  v_8                150000 non-null  float64\n",
      " 25  v_9                150000 non-null  float64\n",
      " 26  v_10               150000 non-null  float64\n",
      " 27  v_11               150000 non-null  float64\n",
      " 28  v_12               150000 non-null  float64\n",
      " 29  v_13               150000 non-null  float64\n",
      " 30  v_14               150000 non-null  float64\n",
      "dtypes: float64(20), int64(10), object(1)\n",
      "memory usage: 35.5+ MB\n"
     ]
    }
   ],
   "source": [
    "Train_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c8341c82",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>name</th>\n",
       "      <th>regDate</th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>bodyType</th>\n",
       "      <th>fuelType</th>\n",
       "      <th>gearbox</th>\n",
       "      <th>power</th>\n",
       "      <th>kilometer</th>\n",
       "      <th>...</th>\n",
       "      <th>v_5</th>\n",
       "      <th>v_6</th>\n",
       "      <th>v_7</th>\n",
       "      <th>v_8</th>\n",
       "      <th>v_9</th>\n",
       "      <th>v_10</th>\n",
       "      <th>v_11</th>\n",
       "      <th>v_12</th>\n",
       "      <th>v_13</th>\n",
       "      <th>v_14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>1.500000e+05</td>\n",
       "      <td>149999.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>145494.000000</td>\n",
       "      <td>141320.000000</td>\n",
       "      <td>144019.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "      <td>150000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>74999.500000</td>\n",
       "      <td>68349.172873</td>\n",
       "      <td>2.003417e+07</td>\n",
       "      <td>47.129021</td>\n",
       "      <td>8.052733</td>\n",
       "      <td>1.792369</td>\n",
       "      <td>0.375842</td>\n",
       "      <td>0.224943</td>\n",
       "      <td>119.316547</td>\n",
       "      <td>12.597160</td>\n",
       "      <td>...</td>\n",
       "      <td>0.248204</td>\n",
       "      <td>0.044923</td>\n",
       "      <td>0.124692</td>\n",
       "      <td>0.058144</td>\n",
       "      <td>0.061996</td>\n",
       "      <td>-0.001000</td>\n",
       "      <td>0.009035</td>\n",
       "      <td>0.004813</td>\n",
       "      <td>0.000313</td>\n",
       "      <td>-0.000688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>43301.414527</td>\n",
       "      <td>61103.875095</td>\n",
       "      <td>5.364988e+04</td>\n",
       "      <td>49.536040</td>\n",
       "      <td>7.864956</td>\n",
       "      <td>1.760640</td>\n",
       "      <td>0.548677</td>\n",
       "      <td>0.417546</td>\n",
       "      <td>177.168419</td>\n",
       "      <td>3.919576</td>\n",
       "      <td>...</td>\n",
       "      <td>0.045804</td>\n",
       "      <td>0.051743</td>\n",
       "      <td>0.201410</td>\n",
       "      <td>0.029186</td>\n",
       "      <td>0.035692</td>\n",
       "      <td>3.772386</td>\n",
       "      <td>3.286071</td>\n",
       "      <td>2.517478</td>\n",
       "      <td>1.288988</td>\n",
       "      <td>1.038685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.991000e+07</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-9.168192</td>\n",
       "      <td>-5.558207</td>\n",
       "      <td>-9.639552</td>\n",
       "      <td>-4.153899</td>\n",
       "      <td>-6.546556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>37499.750000</td>\n",
       "      <td>11156.000000</td>\n",
       "      <td>1.999091e+07</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>12.500000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.243615</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.062474</td>\n",
       "      <td>0.035334</td>\n",
       "      <td>0.033930</td>\n",
       "      <td>-3.722303</td>\n",
       "      <td>-1.951543</td>\n",
       "      <td>-1.871846</td>\n",
       "      <td>-1.057789</td>\n",
       "      <td>-0.437034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>74999.500000</td>\n",
       "      <td>51638.000000</td>\n",
       "      <td>2.003091e+07</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>110.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.257798</td>\n",
       "      <td>0.000812</td>\n",
       "      <td>0.095866</td>\n",
       "      <td>0.057014</td>\n",
       "      <td>0.058484</td>\n",
       "      <td>1.624076</td>\n",
       "      <td>-0.358053</td>\n",
       "      <td>-0.130753</td>\n",
       "      <td>-0.036245</td>\n",
       "      <td>0.141246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>112499.250000</td>\n",
       "      <td>118841.250000</td>\n",
       "      <td>2.007111e+07</td>\n",
       "      <td>66.000000</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>150.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.265297</td>\n",
       "      <td>0.102009</td>\n",
       "      <td>0.125243</td>\n",
       "      <td>0.079382</td>\n",
       "      <td>0.087491</td>\n",
       "      <td>2.844357</td>\n",
       "      <td>1.255022</td>\n",
       "      <td>1.776933</td>\n",
       "      <td>0.942813</td>\n",
       "      <td>0.680378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>149999.000000</td>\n",
       "      <td>196812.000000</td>\n",
       "      <td>2.015121e+07</td>\n",
       "      <td>247.000000</td>\n",
       "      <td>39.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>19312.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.291838</td>\n",
       "      <td>0.151420</td>\n",
       "      <td>1.404936</td>\n",
       "      <td>0.160791</td>\n",
       "      <td>0.222787</td>\n",
       "      <td>12.357011</td>\n",
       "      <td>18.819042</td>\n",
       "      <td>13.847792</td>\n",
       "      <td>11.147669</td>\n",
       "      <td>8.658418</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              SaleID           name       regDate          model  \\\n",
       "count  150000.000000  150000.000000  1.500000e+05  149999.000000   \n",
       "mean    74999.500000   68349.172873  2.003417e+07      47.129021   \n",
       "std     43301.414527   61103.875095  5.364988e+04      49.536040   \n",
       "min         0.000000       0.000000  1.991000e+07       0.000000   \n",
       "25%     37499.750000   11156.000000  1.999091e+07      10.000000   \n",
       "50%     74999.500000   51638.000000  2.003091e+07      30.000000   \n",
       "75%    112499.250000  118841.250000  2.007111e+07      66.000000   \n",
       "max    149999.000000  196812.000000  2.015121e+07     247.000000   \n",
       "\n",
       "               brand       bodyType       fuelType        gearbox  \\\n",
       "count  150000.000000  145494.000000  141320.000000  144019.000000   \n",
       "mean        8.052733       1.792369       0.375842       0.224943   \n",
       "std         7.864956       1.760640       0.548677       0.417546   \n",
       "min         0.000000       0.000000       0.000000       0.000000   \n",
       "25%         1.000000       0.000000       0.000000       0.000000   \n",
       "50%         6.000000       1.000000       0.000000       0.000000   \n",
       "75%        13.000000       3.000000       1.000000       0.000000   \n",
       "max        39.000000       7.000000       6.000000       1.000000   \n",
       "\n",
       "               power      kilometer  ...            v_5            v_6  \\\n",
       "count  150000.000000  150000.000000  ...  150000.000000  150000.000000   \n",
       "mean      119.316547      12.597160  ...       0.248204       0.044923   \n",
       "std       177.168419       3.919576  ...       0.045804       0.051743   \n",
       "min         0.000000       0.500000  ...       0.000000       0.000000   \n",
       "25%        75.000000      12.500000  ...       0.243615       0.000038   \n",
       "50%       110.000000      15.000000  ...       0.257798       0.000812   \n",
       "75%       150.000000      15.000000  ...       0.265297       0.102009   \n",
       "max     19312.000000      15.000000  ...       0.291838       0.151420   \n",
       "\n",
       "                 v_7            v_8            v_9           v_10  \\\n",
       "count  150000.000000  150000.000000  150000.000000  150000.000000   \n",
       "mean        0.124692       0.058144       0.061996      -0.001000   \n",
       "std         0.201410       0.029186       0.035692       3.772386   \n",
       "min         0.000000       0.000000       0.000000      -9.168192   \n",
       "25%         0.062474       0.035334       0.033930      -3.722303   \n",
       "50%         0.095866       0.057014       0.058484       1.624076   \n",
       "75%         0.125243       0.079382       0.087491       2.844357   \n",
       "max         1.404936       0.160791       0.222787      12.357011   \n",
       "\n",
       "                v_11           v_12           v_13           v_14  \n",
       "count  150000.000000  150000.000000  150000.000000  150000.000000  \n",
       "mean        0.009035       0.004813       0.000313      -0.000688  \n",
       "std         3.286071       2.517478       1.288988       1.038685  \n",
       "min        -5.558207      -9.639552      -4.153899      -6.546556  \n",
       "25%        -1.951543      -1.871846      -1.057789      -0.437034  \n",
       "50%        -0.358053      -0.130753      -0.036245       0.141246  \n",
       "75%         1.255022       1.776933       0.942813       0.680378  \n",
       "max        18.819042      13.847792      11.147669       8.658418  \n",
       "\n",
       "[8 rows x 30 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Train_data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ee27b4be",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['SaleID', 'name', 'regDate', 'model', 'brand', 'bodyType', 'fuelType',\n",
      "       'gearbox', 'power', 'kilometer', 'regionCode', 'seller', 'offerType',\n",
      "       'creatDate', 'price', 'v_0', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6',\n",
      "       'v_7', 'v_8', 'v_9', 'v_10', 'v_11', 'v_12', 'v_13', 'v_14'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "numerical_cols = Train_data.select_dtypes(exclude = 'object').columns\n",
    "print(numerical_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8be54f0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['notRepairedDamage'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "categorical_cols = Train_data.select_dtypes(include = 'object').columns\n",
    "print(categorical_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "377ec4db",
   "metadata": {},
   "outputs": [],
   "source": [
    "def Sta_inf(data):\n",
    "    print('_min',np.min(data))\n",
    "    print('_max:',np.max(data))\n",
    "    print('_mean',np.mean(data))\n",
    "    print('_ptp',np.ptp(data))\n",
    "    print('_std',np.std(data))\n",
    "    print('_var',np.var(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ea0da452",
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_cols = [col for col in numerical_cols if col not in ['SaleID','name','regDate','creatDate','price','model','brand','regionCode','seller']]\n",
    "feature_cols = [col for col in feature_cols if 'Type' not in col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "03d35998",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X train shape (150000, 18)\n",
      "X test shape (50000, 18)\n"
     ]
    }
   ],
   "source": [
    "X_data = Train_data[feature_cols]\n",
    "Y_data = Train_data['price']\n",
    "X_test = TestA_data[feature_cols]\n",
    "print('X train shape',X_data.shape)\n",
    "print('X test shape',X_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "95b5508c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sta of label:\n",
      "_min 11\n",
      "_max: 99999\n",
      "_mean 5923.327333333334\n",
      "_ptp 99988\n",
      "_std 7501.973469876438\n",
      "_var 56279605.94272992\n"
     ]
    }
   ],
   "source": [
    "print('Sta of label:')\n",
    "Sta_inf(Y_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4fb85169",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAD4CAYAAAD7CAEUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUyElEQVR4nO3df6zd9X3f8edrdkNIMogNBrk2momwugHalmAR0kxVVHfgJlHMHyA5WobXerLG2JZ2kzp7+QOtlSXYqtKhDVYUKIZmgOdmw0rEEsu0qiZRk5smKz9dbkMGt7j4dmaUdYLF9L0/zucq595cX/tzz8XX9/r5kI7O9/v+fj7f8/ngxC++38/3HFJVSJJ0uv7KYg9AkrS0GBySpC4GhySpi8EhSepicEiSuqxc7AEstIsvvrg2bNiw2MOQpCXl29/+9p9V1ZrTabvsgmPDhg2MjY0t9jAkaUlJ8j9Pt623qiRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldlt03x0e1YdfXF+Vzv3/HZxblcyWpl1cckqQuBockqcspgyPJA0mOJXl2qPZvk7yY5A+T/JckHx46tjvJeJIjSW4Yql+T5Jl27O4kafXzkjzW6oeTbBjqsz3JS+21faEmLUmav9O54ngQ2DKjdhC4uqr+JvBHwG6AJFcC24CrWp97kqxofe4FdgIb22vqnDuAN6rqCuAu4M52rtXA7cDHgWuB25Os6p+iJGkhnTI4qur3gOMzat+sqhNt9/eB9W17K/BoVb1TVS8D48C1SdYCF1TVU1VVwEPAjUN99rbt/cDmdjVyA3Cwqo5X1RsMwmpmgEmSzrCFWOP4eeCJtr0OeHXo2ESrrWvbM+vT+rQwehO4aI5z/YgkO5OMJRmbnJwcaTKSpLmNFBxJvgScAL4yVZqlWc1Rn2+f6cWq+6pqU1VtWrPmtP4DVpKkeZp3cLTF6s8Cf6/dfoLBVcFlQ83WA6+1+vpZ6tP6JFkJXMjg1tjJziVJWkTzCo4kW4B/CXyuqv7v0KEDwLb2pNTlDBbBn66qo8BbSa5r6xe3AI8P9Zl6Yuom4MkWRN8Ark+yqi2KX99qkqRFdMpvjid5BPgUcHGSCQZPOu0GzgMOtqdqf7+q/lFVPZdkH/A8g1tYt1XVu+1UtzJ4Qut8BmsiU+si9wMPJxlncKWxDaCqjif5FeBbrd0vV9W0RXpJ0pl3yuCoqs/PUr5/jvZ7gD2z1MeAq2epvw3cfJJzPQA8cKoxSpLOHL85LknqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqcsrgSPJAkmNJnh2qrU5yMMlL7X3V0LHdScaTHElyw1D9miTPtGN3J0mrn5fksVY/nGTDUJ/t7TNeSrJ9wWYtSZq307nieBDYMqO2CzhUVRuBQ22fJFcC24CrWp97kqxofe4FdgIb22vqnDuAN6rqCuAu4M52rtXA7cDHgWuB24cDSpK0OE4ZHFX1e8DxGeWtwN62vRe4caj+aFW9U1UvA+PAtUnWAhdU1VNVVcBDM/pMnWs/sLldjdwAHKyq41X1BnCQHw0wSdIZNt81jkur6ihAe7+k1dcBrw61m2i1dW17Zn1an6o6AbwJXDTHuX5Ekp1JxpKMTU5OznNKkqTTsdCL45mlVnPU59tnerHqvqraVFWb1qxZc1oDlSTNz3yD4/V2+4n2fqzVJ4DLhtqtB15r9fWz1Kf1SbISuJDBrbGTnUuStIjmGxwHgKmnnLYDjw/Vt7UnpS5nsAj+dLud9VaS69r6xS0z+kyd6ybgybYO8g3g+iSr2qL49a0mSVpEK0/VIMkjwKeAi5NMMHjS6Q5gX5IdwCvAzQBV9VySfcDzwAngtqp6t53qVgZPaJ0PPNFeAPcDDycZZ3Clsa2d63iSXwG+1dr9clXNXKSXJJ1hpwyOqvr8SQ5tPkn7PcCeWepjwNWz1N+mBc8sxx4AHjjVGCVJZ47fHJckdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktRlpOBI8otJnkvybJJHkrw/yeokB5O81N5XDbXfnWQ8yZEkNwzVr0nyTDt2d5K0+nlJHmv1w0k2jDJeSdLo5h0cSdYB/wzYVFVXAyuAbcAu4FBVbQQOtX2SXNmOXwVsAe5JsqKd7l5gJ7Cxvba0+g7gjaq6ArgLuHO+45UkLYxRb1WtBM5PshL4APAasBXY247vBW5s21uBR6vqnap6GRgHrk2yFrigqp6qqgIemtFn6lz7gc1TVyOSpMUx7+Coqj8BfhV4BTgKvFlV3wQuraqjrc1R4JLWZR3w6tApJlptXdueWZ/Wp6pOAG8CF80cS5KdScaSjE1OTs53SpKk0zDKrapVDK4ILgd+HPhgki/M1WWWWs1Rn6vP9ELVfVW1qao2rVmzZu6BS5JGMsqtqp8BXq6qyar6AfBV4CeB19vtJ9r7sdZ+ArhsqP96Bre2Jtr2zPq0Pu122IXA8RHGLEka0SjB8QpwXZIPtHWHzcALwAFge2uzHXi8bR8AtrUnpS5nsAj+dLud9VaS69p5bpnRZ+pcNwFPtnUQSdIiWTnfjlV1OMl+4A+AE8B3gPuADwH7kuxgEC43t/bPJdkHPN/a31ZV77bT3Qo8CJwPPNFeAPcDDycZZ3ClsW2+45UkLYx5BwdAVd0O3D6j/A6Dq4/Z2u8B9sxSHwOunqX+Ni14JElnB785LknqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqctIwZHkw0n2J3kxyQtJPpFkdZKDSV5q76uG2u9OMp7kSJIbhurXJHmmHbs7SVr9vCSPtfrhJBtGGa8kaXSjXnH8O+C/VdVfB/4W8AKwCzhUVRuBQ22fJFcC24CrgC3APUlWtPPcC+wENrbXllbfAbxRVVcAdwF3jjheSdKI5h0cSS4Afgq4H6Cq/l9V/W9gK7C3NdsL3Ni2twKPVtU7VfUyMA5cm2QtcEFVPVVVBTw0o8/UufYDm6euRiRJi2OUK46PAJPAbyb5TpIvJ/kgcGlVHQVo75e09uuAV4f6T7TaurY9sz6tT1WdAN4ELhphzJKkEY0SHCuBjwH3VtVHgb+g3ZY6idmuFGqO+lx9pp842ZlkLMnY5OTk3KOWJI1klOCYACaq6nDb388gSF5vt59o78eG2l821H898Fqrr5+lPq1PkpXAhcDxmQOpqvuqalNVbVqzZs0IU5Ikncq8g6Oq/hR4NclPtNJm4HngALC91bYDj7ftA8C29qTU5QwWwZ9ut7PeSnJdW7+4ZUafqXPdBDzZ1kEkSYtk5Yj9/ynwlSTvA74H/ByDMNqXZAfwCnAzQFU9l2Qfg3A5AdxWVe+289wKPAicDzzRXjBYeH84yTiDK41tI45XkjSikYKjqr4LbJrl0OaTtN8D7JmlPgZcPUv9bVrwSJLODn5zXJLUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUZeTgSLIiyXeSfK3tr05yMMlL7X3VUNvdScaTHElyw1D9miTPtGN3J0mrn5fksVY/nGTDqOOVJI1mIa44vgi8MLS/CzhUVRuBQ22fJFcC24CrgC3APUlWtD73AjuBje21pdV3AG9U1RXAXcCdCzBeSdIIRgqOJOuBzwBfHipvBfa27b3AjUP1R6vqnap6GRgHrk2yFrigqp6qqgIemtFn6lz7gc1TVyOSpMUx6hXHrwO/BPzlUO3SqjoK0N4vafV1wKtD7SZabV3bnlmf1qeqTgBvAhfNHESSnUnGkoxNTk6OOCVJ0lzmHRxJPgscq6pvn26XWWo1R32uPtMLVfdV1aaq2rRmzZrTHI4kaT5WjtD3k8DnknwaeD9wQZLfAl5PsraqjrbbUMda+wngsqH+64HXWn39LPXhPhNJVgIXAsdHGLMkaUTzvuKoqt1Vtb6qNjBY9H6yqr4AHAC2t2bbgcfb9gFgW3tS6nIGi+BPt9tZbyW5rq1f3DKjz9S5bmqf8SNXHJKkM2eUK46TuQPYl2QH8ApwM0BVPZdkH/A8cAK4rarebX1uBR4EzgeeaC+A+4GHk4wzuNLY9h6MV5LUYUGCo6p+F/jdtv2/gM0nabcH2DNLfQy4epb627TgkSSdHfzmuCSpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC7zDo4klyX5nSQvJHkuyRdbfXWSg0leau+rhvrsTjKe5EiSG4bq1yR5ph27O0la/bwkj7X64SQbRpirJGkBjHLFcQL4F1X1N4DrgNuSXAnsAg5V1UbgUNunHdsGXAVsAe5JsqKd615gJ7Cxvba0+g7gjaq6ArgLuHOE8UqSFsC8g6OqjlbVH7Ttt4AXgHXAVmBva7YXuLFtbwUerap3quplYBy4Nsla4IKqeqqqCnhoRp+pc+0HNk9djUiSFseCrHG0W0gfBQ4Dl1bVURiEC3BJa7YOeHWo20SrrWvbM+vT+lTVCeBN4KJZPn9nkrEkY5OTkwsxJUnSSYwcHEk+BPw28AtV9edzNZ2lVnPU5+ozvVB1X1VtqqpNa9asOdWQJUkjWDlK5yQ/xiA0vlJVX23l15Osraqj7TbUsVafAC4b6r4eeK3V189SH+4zkWQlcCFwfJQxn6027Pr6on329+/4zKJ9tqSlZ5SnqgLcD7xQVb82dOgAsL1tbwceH6pva09KXc5gEfzpdjvrrSTXtXPeMqPP1LluAp5s6yCSpEUyyhXHJ4G/DzyT5Lut9q+AO4B9SXYArwA3A1TVc0n2Ac8zeCLrtqp6t/W7FXgQOB94or1gEEwPJxlncKWxbYTxSpIWwLyDo6r+O7OvQQBsPkmfPcCeWepjwNWz1N+mBY8k6ezgN8clSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHVZudgD0OLbsOvri/K537/jM4vyuZJG4xWHJKmLwSFJ6mJwSJK6GBySpC5LIjiSbElyJMl4kl2LPR5JOped9U9VJVkB/Afg7wITwLeSHKiq5xd3ZBqVT3NJS9NZHxzAtcB4VX0PIMmjwFbA4NC8LFZgLSbDUgtpKQTHOuDVof0J4OPDDZLsBHa23f+T5MgIn3cx8Gcj9F+KzrU5n2vzJXeee3PmHPxzZrQ5/7XTbbgUgiOz1GraTtV9wH0L8mHJWFVtWohzLRXn2pzPtfmCcz5XnKk5L4XF8QngsqH99cBrizQWSTrnLYXg+BawMcnlSd4HbAMOLPKYJOmcddbfqqqqE0n+CfANYAXwQFU99x5+5ILc8lpizrU5n2vzBed8rjgjc05VnbqVJEnNUrhVJUk6ixgckqQuBkezlH/WJMllSX4nyQtJnkvyxVZfneRgkpfa+6qhPrvbXI8kuWGofk2SZ9qxu5Ok1c9L8lirH06y4YxPdBZJViT5TpKvtf1lPeckH06yP8mL7c/7E+fAnH+x/e/62SSPJHn/cptzkgeSHEvy7FDtjMwxyfb2GS8l2X5aA66qc/7FYNH9j4GPAO8D/gdw5WKPq2P8a4GPte2/CvwRcCXwb4Bdrb4LuLNtX9nmeB5weZv7inbsaeATDL4/8wTws63+j4H/2La3AY8t9rzbWP458J+Ar7X9ZT1nYC/wD9v2+4APL+c5M/gC8MvA+W1/H/APltucgZ8CPgY8O1R7z+cIrAa+195Xte1VpxzvYv8f4Wx4tX/Q3xja3w3sXuxxjTCfxxn8ttcRYG2rrQWOzDY/Bk+sfaK1eXGo/nngN4bbtO2VDL6dmkWe53rgEPDT/DA4lu2cgQsY/CWaGfXlPOepX45Y3cbzNeD65ThnYAPTg+M9n+Nwm3bsN4DPn2qs3qoamO1nTdYt0lhG0i5BPwocBi6tqqMA7f2S1uxk813XtmfWp/WpqhPAm8BF78kkTt+vA78E/OVQbTnP+SPAJPCb7fbcl5N8kGU856r6E+BXgVeAo8CbVfVNlvGch5yJOc7r7z6DY+CUP2uyFCT5EPDbwC9U1Z/P1XSWWs1Rn6vPokjyWeBYVX37dLvMUltSc2bwb4ofA+6tqo8Cf8HgFsbJLPk5t/v6Wxnckvlx4INJvjBXl1lqS2rOp2Eh5zivuRscA0v+Z02S/BiD0PhKVX21lV9PsrYdXwsca/WTzXeibc+sT+uTZCVwIXB84Wdy2j4JfC7J94FHgZ9O8lss7zlPABNVdbjt72cQJMt5zj8DvFxVk1X1A+CrwE+yvOc85UzMcV5/9xkcA0v6Z03akxP3Ay9U1a8NHToATD0lsZ3B2sdUfVt70uJyYCPwdLscfivJde2ct8zoM3Wum4Anq90UXQxVtbuq1lfVBgZ/Xk9W1RdY3nP+U+DVJD/RSpsZ/OcFlu2cGdyiui7JB9pYNwMvsLznPOVMzPEbwPVJVrWru+tbbW5negHobH0Bn2bwNNIfA19a7PF0jv3vMLi8/EPgu+31aQb3MA8BL7X31UN9vtTmeoT25EWrbwKebcf+PT/8dYH3A/8ZGGfw5MZHFnveQ2P+FD9cHF/Wcwb+NjDW/qz/K4MnYZb7nP818GIb78MMniZaVnMGHmGwhvMDBlcBO87UHIGfb/Vx4OdOZ7z+5IgkqYu3qiRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTl/wPrJLiZ14MdEAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(Y_data)\n",
    "plt.show()\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0ada64f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_data = X_data.fillna(-1)\n",
    "X_test = X_test.fillna(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3d1c0485",
   "metadata": {},
   "outputs": [],
   "source": [
    "xgr = xgb.XGBRegressor(n_estimators=120, learning_rate=0.1, gamma=0, subsample=0.8,\\\n",
    "        colsample_bytree=0.9, max_depth=7)\n",
    "scores_train = []\n",
    "scores = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "16788e18",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train mae: 622.836567743063\n",
      "Val mae 714.0856746034109\n"
     ]
    }
   ],
   "source": [
    "sk=StratifiedKFold(n_splits=5,shuffle=True,random_state=0)\n",
    "for train_ind,val_ind in sk.split(X_data,Y_data):\n",
    "    \n",
    "    train_x=X_data.iloc[train_ind].values\n",
    "    train_y=Y_data.iloc[train_ind]\n",
    "    val_x=X_data.iloc[val_ind].values\n",
    "    val_y=Y_data.iloc[val_ind]\n",
    "    \n",
    "    xgr.fit(train_x,train_y)\n",
    "    pred_train_xgb=xgr.predict(train_x)\n",
    "    pred_xgb=xgr.predict(val_x)\n",
    "    \n",
    "    score_train = mean_absolute_error(train_y,pred_train_xgb)\n",
    "    scores_train.append(score_train)\n",
    "    score = mean_absolute_error(val_y,pred_xgb)\n",
    "    scores.append(score)\n",
    "\n",
    "print('Train mae:',np.mean(score_train))\n",
    "print('Val mae',np.mean(scores))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "3256a7d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_model_xgb(x_train,y_train):\n",
    "    model = xgb.XGBRegressor(n_estimators=150, learning_rate=0.1, gamma=0, subsample=0.8,\\\n",
    "        colsample_bytree=0.9, max_depth=7) #, objective ='reg:squarederror'\n",
    "    model.fit(x_train, y_train)\n",
    "    return model\n",
    "\n",
    "def build_model_lgb(x_train,y_train):\n",
    "    estimator = lgb.LGBMRegressor(num_leaves=127,n_estimators = 150)\n",
    "    param_grid = {\n",
    "        'learning_rate': [0.01, 0.05, 0.1, 0.2],\n",
    "    }\n",
    "    gbm = GridSearchCV(estimator, param_grid)\n",
    "    gbm.fit(x_train, y_train)\n",
    "    return gbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "66525ac4",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train,x_val,y_train,y_val = train_test_split(X_data,Y_data,test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d5e17073",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train lgb...\n",
      "MAE of val with lgb: 684.6019056983067\n",
      "Predict lgb...\n",
      "Sta of Predict lgb:\n",
      "_min -589.8793550785414\n",
      "_max: 90760.26063584947\n",
      "_mean 5906.935218383807\n",
      "_ptp 91350.13999092802\n",
      "_std 7344.644970956768\n",
      "_var 53943809.749400534\n"
     ]
    }
   ],
   "source": [
    "print('Train lgb...')\n",
    "model_lgb = build_model_lgb(x_train,y_train)\n",
    "val_lgb = model_lgb.predict(x_val)\n",
    "MAE_lgb = mean_absolute_error(y_val,val_lgb)\n",
    "print('MAE of val with lgb:',MAE_lgb)\n",
    "\n",
    "print('Predict lgb...')\n",
    "model_lgb_pre = build_model_lgb(X_data,Y_data)\n",
    "subA_lgb = model_lgb_pre.predict(X_test)\n",
    "print('Sta of Predict lgb:')\n",
    "Sta_inf(subA_lgb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "8867f144",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train xgb...\n",
      "MAE of val with xgb: 705.6513858358384\n",
      "Predict xgb...\n",
      "Sta of Predict xgb:\n",
      "_min -318.20892\n",
      "_max: 90140.625\n",
      "_mean 5910.7607\n",
      "_ptp 90458.836\n",
      "_std 7345.965\n",
      "_var 53963196.0\n"
     ]
    }
   ],
   "source": [
    "print('Train xgb...')\n",
    "model_xgb = build_model_xgb(x_train,y_train)\n",
    "val_xgb = model_xgb.predict(x_val)\n",
    "MAE_xgb = mean_absolute_error(y_val,val_xgb)\n",
    "print('MAE of val with xgb:',MAE_xgb)\n",
    "\n",
    "print('Predict xgb...')\n",
    "model_xgb_pre = build_model_xgb(X_data,Y_data)\n",
    "subA_xgb = model_xgb_pre.predict(X_test)\n",
    "print('Sta of Predict xgb:')\n",
    "Sta_inf(subA_xgb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f3fad0cd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MAE of val with Weighted ensemble: 679.7277056741226\n"
     ]
    }
   ],
   "source": [
    "val_Weighted = (1-MAE_lgb/(MAE_xgb+MAE_lgb))*val_lgb+(1-MAE_xgb/(MAE_xgb+MAE_lgb))*val_xgb\n",
    "val_Weighted[val_Weighted<0]=10\n",
    "print('MAE of val with Weighted ensemble:',mean_absolute_error(y_val,val_Weighted))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "22250fd7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAD4CAYAAAD7CAEUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUyElEQVR4nO3df6zd9X3f8edrdkNIMogNBrk2momwugHalmAR0kxVVHfgJlHMHyA5WobXerLG2JZ2kzp7+QOtlSXYqtKhDVYUKIZmgOdmw0rEEsu0qiZRk5smKz9dbkMGt7j4dmaUdYLF9L0/zucq595cX/tzz8XX9/r5kI7O9/v+fj7f8/ngxC++38/3HFJVSJJ0uv7KYg9AkrS0GBySpC4GhySpi8EhSepicEiSuqxc7AEstIsvvrg2bNiw2MOQpCXl29/+9p9V1ZrTabvsgmPDhg2MjY0t9jAkaUlJ8j9Pt623qiRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldlt03x0e1YdfXF+Vzv3/HZxblcyWpl1cckqQuBockqcspgyPJA0mOJXl2qPZvk7yY5A+T/JckHx46tjvJeJIjSW4Yql+T5Jl27O4kafXzkjzW6oeTbBjqsz3JS+21faEmLUmav9O54ngQ2DKjdhC4uqr+JvBHwG6AJFcC24CrWp97kqxofe4FdgIb22vqnDuAN6rqCuAu4M52rtXA7cDHgWuB25Os6p+iJGkhnTI4qur3gOMzat+sqhNt9/eB9W17K/BoVb1TVS8D48C1SdYCF1TVU1VVwEPAjUN99rbt/cDmdjVyA3Cwqo5X1RsMwmpmgEmSzrCFWOP4eeCJtr0OeHXo2ESrrWvbM+vT+rQwehO4aI5z/YgkO5OMJRmbnJwcaTKSpLmNFBxJvgScAL4yVZqlWc1Rn2+f6cWq+6pqU1VtWrPmtP4DVpKkeZp3cLTF6s8Cf6/dfoLBVcFlQ83WA6+1+vpZ6tP6JFkJXMjg1tjJziVJWkTzCo4kW4B/CXyuqv7v0KEDwLb2pNTlDBbBn66qo8BbSa5r6xe3AI8P9Zl6Yuom4MkWRN8Ark+yqi2KX99qkqRFdMpvjid5BPgUcHGSCQZPOu0GzgMOtqdqf7+q/lFVPZdkH/A8g1tYt1XVu+1UtzJ4Qut8BmsiU+si9wMPJxlncKWxDaCqjif5FeBbrd0vV9W0RXpJ0pl3yuCoqs/PUr5/jvZ7gD2z1MeAq2epvw3cfJJzPQA8cKoxSpLOHL85LknqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqcsrgSPJAkmNJnh2qrU5yMMlL7X3V0LHdScaTHElyw1D9miTPtGN3J0mrn5fksVY/nGTDUJ/t7TNeSrJ9wWYtSZq307nieBDYMqO2CzhUVRuBQ22fJFcC24CrWp97kqxofe4FdgIb22vqnDuAN6rqCuAu4M52rtXA7cDHgWuB24cDSpK0OE4ZHFX1e8DxGeWtwN62vRe4caj+aFW9U1UvA+PAtUnWAhdU1VNVVcBDM/pMnWs/sLldjdwAHKyq41X1BnCQHw0wSdIZNt81jkur6ihAe7+k1dcBrw61m2i1dW17Zn1an6o6AbwJXDTHuX5Ekp1JxpKMTU5OznNKkqTTsdCL45mlVnPU59tnerHqvqraVFWb1qxZc1oDlSTNz3yD4/V2+4n2fqzVJ4DLhtqtB15r9fWz1Kf1SbISuJDBrbGTnUuStIjmGxwHgKmnnLYDjw/Vt7UnpS5nsAj+dLud9VaS69r6xS0z+kyd6ybgybYO8g3g+iSr2qL49a0mSVpEK0/VIMkjwKeAi5NMMHjS6Q5gX5IdwCvAzQBV9VySfcDzwAngtqp6t53qVgZPaJ0PPNFeAPcDDycZZ3Clsa2d63iSXwG+1dr9clXNXKSXJJ1hpwyOqvr8SQ5tPkn7PcCeWepjwNWz1N+mBc8sxx4AHjjVGCVJZ47fHJckdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktRlpOBI8otJnkvybJJHkrw/yeokB5O81N5XDbXfnWQ8yZEkNwzVr0nyTDt2d5K0+nlJHmv1w0k2jDJeSdLo5h0cSdYB/wzYVFVXAyuAbcAu4FBVbQQOtX2SXNmOXwVsAe5JsqKd7l5gJ7Cxvba0+g7gjaq6ArgLuHO+45UkLYxRb1WtBM5PshL4APAasBXY247vBW5s21uBR6vqnap6GRgHrk2yFrigqp6qqgIemtFn6lz7gc1TVyOSpMUx7+Coqj8BfhV4BTgKvFlV3wQuraqjrc1R4JLWZR3w6tApJlptXdueWZ/Wp6pOAG8CF80cS5KdScaSjE1OTs53SpKk0zDKrapVDK4ILgd+HPhgki/M1WWWWs1Rn6vP9ELVfVW1qao2rVmzZu6BS5JGMsqtqp8BXq6qyar6AfBV4CeB19vtJ9r7sdZ+ArhsqP96Bre2Jtr2zPq0Pu122IXA8RHGLEka0SjB8QpwXZIPtHWHzcALwAFge2uzHXi8bR8AtrUnpS5nsAj+dLud9VaS69p5bpnRZ+pcNwFPtnUQSdIiWTnfjlV1OMl+4A+AE8B3gPuADwH7kuxgEC43t/bPJdkHPN/a31ZV77bT3Qo8CJwPPNFeAPcDDycZZ3ClsW2+45UkLYx5BwdAVd0O3D6j/A6Dq4/Z2u8B9sxSHwOunqX+Ni14JElnB785LknqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqctIwZHkw0n2J3kxyQtJPpFkdZKDSV5q76uG2u9OMp7kSJIbhurXJHmmHbs7SVr9vCSPtfrhJBtGGa8kaXSjXnH8O+C/VdVfB/4W8AKwCzhUVRuBQ22fJFcC24CrgC3APUlWtPPcC+wENrbXllbfAbxRVVcAdwF3jjheSdKI5h0cSS4Afgq4H6Cq/l9V/W9gK7C3NdsL3Ni2twKPVtU7VfUyMA5cm2QtcEFVPVVVBTw0o8/UufYDm6euRiRJi2OUK46PAJPAbyb5TpIvJ/kgcGlVHQVo75e09uuAV4f6T7TaurY9sz6tT1WdAN4ELhphzJKkEY0SHCuBjwH3VtVHgb+g3ZY6idmuFGqO+lx9pp842ZlkLMnY5OTk3KOWJI1klOCYACaq6nDb388gSF5vt59o78eG2l821H898Fqrr5+lPq1PkpXAhcDxmQOpqvuqalNVbVqzZs0IU5Ikncq8g6Oq/hR4NclPtNJm4HngALC91bYDj7ftA8C29qTU5QwWwZ9ut7PeSnJdW7+4ZUafqXPdBDzZ1kEkSYtk5Yj9/ynwlSTvA74H/ByDMNqXZAfwCnAzQFU9l2Qfg3A5AdxWVe+289wKPAicDzzRXjBYeH84yTiDK41tI45XkjSikYKjqr4LbJrl0OaTtN8D7JmlPgZcPUv9bVrwSJLODn5zXJLUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUZeTgSLIiyXeSfK3tr05yMMlL7X3VUNvdScaTHElyw1D9miTPtGN3J0mrn5fksVY/nGTDqOOVJI1mIa44vgi8MLS/CzhUVRuBQ22fJFcC24CrgC3APUlWtD73AjuBje21pdV3AG9U1RXAXcCdCzBeSdIIRgqOJOuBzwBfHipvBfa27b3AjUP1R6vqnap6GRgHrk2yFrigqp6qqgIemtFn6lz7gc1TVyOSpMUx6hXHrwO/BPzlUO3SqjoK0N4vafV1wKtD7SZabV3bnlmf1qeqTgBvAhfNHESSnUnGkoxNTk6OOCVJ0lzmHRxJPgscq6pvn26XWWo1R32uPtMLVfdV1aaq2rRmzZrTHI4kaT5WjtD3k8DnknwaeD9wQZLfAl5PsraqjrbbUMda+wngsqH+64HXWn39LPXhPhNJVgIXAsdHGLMkaUTzvuKoqt1Vtb6qNjBY9H6yqr4AHAC2t2bbgcfb9gFgW3tS6nIGi+BPt9tZbyW5rq1f3DKjz9S5bmqf8SNXHJKkM2eUK46TuQPYl2QH8ApwM0BVPZdkH/A8cAK4rarebX1uBR4EzgeeaC+A+4GHk4wzuNLY9h6MV5LUYUGCo6p+F/jdtv2/gM0nabcH2DNLfQy4epb627TgkSSdHfzmuCSpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC7zDo4klyX5nSQvJHkuyRdbfXWSg0leau+rhvrsTjKe5EiSG4bq1yR5ph27O0la/bwkj7X64SQbRpirJGkBjHLFcQL4F1X1N4DrgNuSXAnsAg5V1UbgUNunHdsGXAVsAe5JsqKd615gJ7Cxvba0+g7gjaq6ArgLuHOE8UqSFsC8g6OqjlbVH7Ttt4AXgHXAVmBva7YXuLFtbwUerap3quplYBy4Nsla4IKqeqqqCnhoRp+pc+0HNk9djUiSFseCrHG0W0gfBQ4Dl1bVURiEC3BJa7YOeHWo20SrrWvbM+vT+lTVCeBN4KJZPn9nkrEkY5OTkwsxJUnSSYwcHEk+BPw28AtV9edzNZ2lVnPU5+ozvVB1X1VtqqpNa9asOdWQJUkjWDlK5yQ/xiA0vlJVX23l15Osraqj7TbUsVafAC4b6r4eeK3V189SH+4zkWQlcCFwfJQxn6027Pr6on329+/4zKJ9tqSlZ5SnqgLcD7xQVb82dOgAsL1tbwceH6pva09KXc5gEfzpdjvrrSTXtXPeMqPP1LluAp5s6yCSpEUyyhXHJ4G/DzyT5Lut9q+AO4B9SXYArwA3A1TVc0n2Ac8zeCLrtqp6t/W7FXgQOB94or1gEEwPJxlncKWxbYTxSpIWwLyDo6r+O7OvQQBsPkmfPcCeWepjwNWz1N+mBY8k6ezgN8clSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHVZudgD0OLbsOvri/K537/jM4vyuZJG4xWHJKmLwSFJ6mJwSJK6GBySpC5LIjiSbElyJMl4kl2LPR5JOped9U9VJVkB/Afg7wITwLeSHKiq5xd3ZBqVT3NJS9NZHxzAtcB4VX0PIMmjwFbA4NC8LFZgLSbDUgtpKQTHOuDVof0J4OPDDZLsBHa23f+T5MgIn3cx8Gcj9F+KzrU5n2vzJXeee3PmHPxzZrQ5/7XTbbgUgiOz1GraTtV9wH0L8mHJWFVtWohzLRXn2pzPtfmCcz5XnKk5L4XF8QngsqH99cBrizQWSTrnLYXg+BawMcnlSd4HbAMOLPKYJOmcddbfqqqqE0n+CfANYAXwQFU99x5+5ILc8lpizrU5n2vzBed8rjgjc05VnbqVJEnNUrhVJUk6ixgckqQuBkezlH/WJMllSX4nyQtJnkvyxVZfneRgkpfa+6qhPrvbXI8kuWGofk2SZ9qxu5Ok1c9L8lirH06y4YxPdBZJViT5TpKvtf1lPeckH06yP8mL7c/7E+fAnH+x/e/62SSPJHn/cptzkgeSHEvy7FDtjMwxyfb2GS8l2X5aA66qc/7FYNH9j4GPAO8D/gdw5WKPq2P8a4GPte2/CvwRcCXwb4Bdrb4LuLNtX9nmeB5weZv7inbsaeATDL4/8wTws63+j4H/2La3AY8t9rzbWP458J+Ar7X9ZT1nYC/wD9v2+4APL+c5M/gC8MvA+W1/H/APltucgZ8CPgY8O1R7z+cIrAa+195Xte1VpxzvYv8f4Wx4tX/Q3xja3w3sXuxxjTCfxxn8ttcRYG2rrQWOzDY/Bk+sfaK1eXGo/nngN4bbtO2VDL6dmkWe53rgEPDT/DA4lu2cgQsY/CWaGfXlPOepX45Y3cbzNeD65ThnYAPTg+M9n+Nwm3bsN4DPn2qs3qoamO1nTdYt0lhG0i5BPwocBi6tqqMA7f2S1uxk813XtmfWp/WpqhPAm8BF78kkTt+vA78E/OVQbTnP+SPAJPCb7fbcl5N8kGU856r6E+BXgVeAo8CbVfVNlvGch5yJOc7r7z6DY+CUP2uyFCT5EPDbwC9U1Z/P1XSWWs1Rn6vPokjyWeBYVX37dLvMUltSc2bwb4ofA+6tqo8Cf8HgFsbJLPk5t/v6Wxnckvlx4INJvjBXl1lqS2rOp2Eh5zivuRscA0v+Z02S/BiD0PhKVX21lV9PsrYdXwsca/WTzXeibc+sT+uTZCVwIXB84Wdy2j4JfC7J94FHgZ9O8lss7zlPABNVdbjt72cQJMt5zj8DvFxVk1X1A+CrwE+yvOc85UzMcV5/9xkcA0v6Z03akxP3Ay9U1a8NHToATD0lsZ3B2sdUfVt70uJyYCPwdLscfivJde2ct8zoM3Wum4Anq90UXQxVtbuq1lfVBgZ/Xk9W1RdY3nP+U+DVJD/RSpsZ/OcFlu2cGdyiui7JB9pYNwMvsLznPOVMzPEbwPVJVrWru+tbbW5negHobH0Bn2bwNNIfA19a7PF0jv3vMLi8/EPgu+31aQb3MA8BL7X31UN9vtTmeoT25EWrbwKebcf+PT/8dYH3A/8ZGGfw5MZHFnveQ2P+FD9cHF/Wcwb+NjDW/qz/K4MnYZb7nP818GIb78MMniZaVnMGHmGwhvMDBlcBO87UHIGfb/Vx4OdOZ7z+5IgkqYu3qiRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTl/wPrJLiZ14MdEAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sub_Weighted = (1-MAE_lgb/(MAE_xgb+MAE_lgb))*subA_lgb+(1-MAE_xgb/(MAE_xgb+MAE_lgb))*subA_xgb\n",
    "plt.hist(Y_data)\n",
    "plt.show()\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "dcf531f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "sub = pd.DataFrame()\n",
    "sub['SaleID']=TestA_data.SaleID\n",
    "sub['price']=sub_Weighted\n",
    "sub.to_csv('./sub_Weighted.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "2f8baeaf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>200000</td>\n",
       "      <td>1177.331633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200001</td>\n",
       "      <td>1806.927512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>200002</td>\n",
       "      <td>8560.770821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>200003</td>\n",
       "      <td>1346.512307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>200004</td>\n",
       "      <td>2074.259059</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID        price\n",
       "0  200000  1177.331633\n",
       "1  200001  1806.927512\n",
       "2  200002  8560.770821\n",
       "3  200003  1346.512307\n",
       "4  200004  2074.259059"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:car] *",
   "language": "python",
   "name": "conda-env-car-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
